import json
import re
import datetime
import bs4
import requests

from getContent import getContent


def list_to_json(dic1_list):
    with open("../data/BendibaoNews.json", "w", encoding="utf-8") as file:  # 注意这里是写模式
        file.write(json.dumps(dic1_list, indent=2, ensure_ascii=False))
    return None


def get_data():
    url = "http://bj.bendibao.com/"
    res = requests.get(
        url=url,
        headers={
            'User-Agent': 'Mozilla/5.0(Macintosh;lnterl Mac OS X 10_14_6) AppleWebKit/537.36(KHTML,like Gecko)Chrome/87.0.4280.88 Safari/537.36'
        }
    )
    res.encoding = res.apparent_encoding
    dic_list = []
    if res.status_code == 200:
        soup = bs4.BeautifulSoup(res.text, 'html.parser')
        # body > div.box > div.container > div.list_con > ul > li:nth-child(1) > div.dot_name
        # body > div.box > div.container > div.list_con > ul > li:nth-child(16)
        # #tab_blocks > ul:nth-child(6) > li:nth-child(2) > a
        ##tab_blocks > ul.current > li:nth-child(3) > a > div.news-logo > img
        anchors = soup.select('#tab_blocks > ul:nth-child(6) > li')  ##右键copy selector
        anchorNums = len(anchors)
        dic_list = []
        for i in range(1, anchorNums):
            # 要 a 标签的href
            anchor = soup.select(f'#tab_blocks > ul:nth-child(6) > li:nth-child({i}) > a')
            link = anchor[0]['href']
            ##tab_blocks > ul.current > li:nth-child(2) > a > div.news-logo > img
            anchor = soup.select(f'#tab_blocks > ul:nth-child(6) > li:nth-child({i}) > a > div.news-logo >img')
            try:
                title = anchor[0]['alt']
                imgUrl = anchor[0]['data-original']
            except:
                continue
            anchor = soup.select(f'#tab_blocks > ul:nth-child(6) > li:nth-child({i}) > div.tagtime > span.date')
            try:
                time = anchor[0].text
                time = time[0:10]
            except:
                continue
            content = getContent(url=link)
            #     "title": "北京中高考将如期举行！附中小学、高校防疫具体安排",
            #     "link": "http://mp.weixin.qq.com/s?__biz=MzAxMDE1MTk3Nw==&mid=2651172483&idx=5&sn=e58037bff59be2f4cb6680df10609e1e&chksm=80a5bfc5b7d236d3b9531bb447f79508a444ef4de5ecc1eb28867790992d8f625a50701f0f82#rd",
            #     "time": "2022-05-26",
            #     "origin": "北京本地宝",
            #     "imgUrl": "http://mmbiz.qpic.cn/mmbiz_jpg/ibX9tvM61XVsBibWrjKKiasAzIYOyoGJJUMet6yl2dUEXhhL5SicNzgd3Q0ibNBlFow04NzrWIgIIvhUMSIkcBdpTMw/0?wx_fmt=jpeg"
            dic_list.append(dict(title=title, link=link, time=time, origin="北京本地宝", imgUrl=imgUrl,content=content))
        list_to_json(dic_list)
        return True
    else:
        print('无法获取页面')
        return False


if __name__ == '__main__':
    print(get_data())    
